rm(list = ls(all.names = TRUE))
gc()
set.seed(211917)
options(scipen=999)

packages <-c("tidyverse","foreign","stargazer","naniar","ri2")

new.packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)

lapply(packages, require, character.only = TRUE)
rm(packages, new.packages)

setwd("PUT YOUR WORKING DIRECTORY HERE")

# load data
dat <- read.dta("./data.dta")

#---------------#
# Data cleaning #
#---------------#

# some preliminary cleaning
dat$q6_years_married[dat$q6_years_married == -9] <- 0
dat$treatment_binary <- ifelse(dat$holy_endorser==1 | dat$secular_endorser==1, 1, 0)

## select all possible covariates and outcome at first
dat <- dat %>% 
  select(q30_combined, treatment_binary, holy_endorser, secular_endorser, respondent_muslim, respondent_male, enumerator_muslim, enumerator_male, q1_age, q2_male, q3_kamba, q3_kikuyu, q3_luo, q3_somali, q3_other, q4_religion_christian, q4_religion_muslim, q5_convert, q6_years_married, q6_married, q7_has_job, q8_income_last_month, q8_has_income, q9_chance_become_rich, q10_plans_to_vote, q11_govt_represents_interest, q12_feels_on_loosing_side, q13_a_knows_emigrant, q13_b_knows_emigrant_som_eri, q15_witnesses_interrel_violence, q16_witnesses_viol_musl_govt, q17_friend_family_died, q18_lost_job, q18_arrested, q18_house_of_worship_raided, q18_stopped_speaking_parents, q18_love_problems, q18_friend_left_country, q18_sum, q19_rlts_mother, q19_not_raised_by_mom, q19_mother_dead, q20_rlts_father, q20_not_raised_by_father, q20_father_dead, q21_respect_friends_family, q22_gender, q22_religion, q22_tribal, q22_national, q22_youth, q23_religious_attendance, q23_frequents_house_worship, q24_deontology, q25_payment_acceptable, q26_payment_ethical, q27_radicalization, q33_distance)

## check for missings
vis_miss(dat)

## following variables seems problematic, i.e., shouldn't impute at this level
table(is.na(dat$q13_a_knows_emigrant))
table(is.na(dat$q19_rlts_mother))
table(is.na(dat$q20_rlts_father))
table(is.na(dat$q23_religious_attendance))

## kick problematic vars out
dat <- dat %>% select(-q13_a_knows_emigrant, -q19_rlts_mother, -q20_rlts_father, -q23_religious_attendance)

## check missing again:
vis_miss(dat) 

## remove one missing treatment indicator (can't be imputed)
dat <- dat[which(dat$holy_endorser>-1),]

## impute rest of missings with mean 
for(i in 1:ncol(dat)){
  dat[is.na(dat[,i]), i] <- mean(dat[,i], na.rm = TRUE)
}
rm(i)

##
dat <- dat[complete.cases(dat), ]

## median split of social distance
dat$q33_distance <- ifelse(dat$q33_distance > median(dat$q33_distance), 1, 0)
table(dat$q33_distance)

## create interactions 
dat$int_holy_dist <- dat$holy_endorser * dat$q33_distance
dat$int_sec_dist <- dat$secular_endorser * dat$q33_distance

#----------------------#
# OLS without controls #
#----------------------#

lm_two_primes_wo_controls <- lm(q30_combined ~ int_holy_dist + int_sec_dist + holy_endorser + secular_endorser + q33_distance, data=dat)

#-------------------#
# OLS with controls #
#-------------------#

## extract all possible controls
controls <- as.character(colnames(dat[,5:54]))

## drop enumerator FEs for now
controls <- controls[c(1:2, 5:50)]

lm_two_primes_all_controls <- lm(paste("q30_combined ~ int_holy_dist + int_sec_dist + holy_endorser + secular_endorser + q33_distance + ", paste(controls, collapse=" + ")), data = dat)

#-------------------------------------#
# OLS with controls and fixed effects #
#-------------------------------------#

# set seed
set.seed(12345)

## extract all possible controls
controls <- as.character(colnames(dat[,5:54]))

lm_two_primes_all_controls_fe <- lm(paste("q30_combined ~ int_holy_dist + int_sec_dist + holy_endorser + secular_endorser + q33_distance + ", paste(controls, collapse=" + ")), data = dat)


stargazer(lm_two_primes_wo_controls, lm_two_primes_all_controls, lm_two_primes_all_controls_fe,
          keep = c("int_holy_dist","int_sec_dist"),
          covariate.labels = c("Source: 'Religious' * Religious distance","Source: 'Some People' * Religious distance"),
          add.lines = list(c("Controls", "No", "Yes", "Yes"),
                           c("FEs", "No", "No", "Yes")),
          keep.stat = c("n", "adj.rsq"),
          digits = 3,
          dep.var.caption = "Support for violence",
          dep.var.labels = c("","",""),
          label = "tab:main",
          title = "Effect of informing about an anti-violence norm's religious source and respon- dents' outgroup distance",
          out = "PUT YOUR FILE PATH HERE")



